home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SunSoft Catalyst CDWARE 1996 May to August
/
Catalyst CDWARE 1996 May to August.iso
/
.products
/
JavaWorld
/
javaworld
/
cgi-bin
/
jw-search.cgi
< prev
next >
Wrap
Text File
|
1996-02-14
|
6KB
|
253 lines
#!/usr/local/bin/perl
# jw-search - search engine for JavaWorld.
#
# $docroot is set to the absolute pathname of the JavaWorld
# content directory. this varies from mirror site to mirror site and
# must be change appropriately.
#
# the search string comes in as the variable 'searchstring'.
# case-sensitivity is determined by the value of 'casesensitive' (it
# is either 'yes' or 'no').
$docroot="/netra2/ns-home/javaworld";
$mailprog="/usr/lib/sendmail";
$bcc="jw-search\@javaworld.com"; # recipient of failsafe copy
$bccname="JW search";
$bccsubject="jwsearch";
$debug="/var/tmp/jw-search.$$";
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
&sendhome;
# split the name-value pairs
@pairs = split(/&/,$buffer);
foreach $pair (@pairs)
{
($name,$value) = split(/=/,$pair);
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
# Stop people from using subshells to execute commands
$value =~ s/~!/ ~!/g;
$form{$name} = $value;
}
$origsearchstring = $form{'searchstring'};
$form{'searchstring'} =~ s#/#\\/#g;
if ($form{'casesensitive'} eq "no") {
$casestr = "case insensitive";
}
else {
$casestr = "case sensitive";
}
&init_variables;
if ($form{'searchstring'} !~ /[\w]/) {
print "$emptysearchstring";
exit(0);
}
&crawl("$docroot");
if (!defined(%hits)) {
print "$nomatchesfound";
exit(0);
}
&dedupe_hits;
$matchnum = keys(%hits);
$matchstr = $matchnum == 1 ? "match" : "matches";
$hits = "Content-TYPE: text/html\n\n<HTML>
<HEAD><TITLE>SunWorld Online Search Response</TITLE></HEAD>
<BODY>
<P>
<H2>Thank you for searching <EM>SunWorld Online</EM>. We appreciate
your patronage. Send any questions, comments, or suggestions to
<A HREF=\"/javaworld/cgi-bin/jw-mailto.cgi?webmaster\@javaworld.com\">webmaster\@javaworld.com</A>.
<P>
Search string ($casestr): <EM>$origsearchstring</EM>
<P>
$matchnum $matchstr found
<P>
Results (ranked by number of occurences of search string in document):</H2>
<P>
";
foreach $i (sort(by_hits_value keys(%hits))) {
$j = $i;
eval("\$j =~ s#^$docroot#/javaworld#");
$hits = $hits."<BR><STRONG>$hits{$i}</STRONG> <A HREF=\"$j\">$j</A>\n";
}
$hits = $hits."<P>\n</BODY>\n</HTML>\n";
print "$hits";
exit(0);
sub search {
local($file) = $_[0];
local($options,$line) = ("","");
$options = "i" if $form{'casesensitive'} eq "no";
open(FILE,"<$file") || return;
eval("while (<FILE>) {
\$_ =~ s/<[^<>]+>//g;
if (\$_ =~ /$form{'searchstring'}/$options) {
\$hits{\$file}++;
}
}");
close(FILE);
}
sub crawl {
local($i,$file);
if (-d $_[0]) {
opendir(DIR,$_[0]) || return;
local(@files) = readdir(DIR);
closedir(DIR);
foreach $i (@files) {
next if $i eq "." || $i eq "..";
($file = "$_[0]/$i") =~ s;[/]{2,};/;g;
if (-d $file) {
&crawl($file);
}
elsif (-f $file) {
&search($file) if $file =~ /\.html$/;
}
}
}
elsif (-f $_[0]) {
&search($_[0]) if $_[0] =~ /\.html$/;
}
}
sub dedupe_hits {
local(%links,%inodes,%nhits,$i,$j,$dev);
undef(%links);
undef(%inodes);
undef(%nhits);
foreach $i (keys %hits) {
($dev,$inodes{$i}) = stat($i);
$links{$inodes{$i}} = $i if $i !~ /index\.html/;
}
foreach $i (keys %links) {
$nhits{$links{$i}} = $hits{$links{$i}};
}
%hits = %nhits;
}
sub by_number {
if ($a < $b) {
-1;
}
elsif ($a == $b) {
0;
}
elsif ($a > $b) {
1;
}
}
sub by_hits_value {
if ($hits{$a} < $hits{$b}) {
1;
}
elsif ($hits{$a} == $hits{$b}) {
0;
}
elsif ($hits{$a} > $hits{$b}) {
-1;
}
}
sub init_variables {
$emptysearchstring = "Content-TYPE: text/html
<HTML>
<HEAD><TITLE>SunWorld Online Search Response</TITLE></HEAD>
<BODY>
<P>
<H2>
Thank you for attempting to search <EM>SunWorld Online</EM>. We
appreciate your patronage. However, an empty search string was
detected. If you really want to search <EM>SunWorld Online's</EM>
archives, hit your browser\'s \"back\" button and try again.
<P>
Send any questions, comments, or suggestions to
<A HREF=\"/cgi-bin/jw-mailto.cgi?webmaster\@javaworld.com\">webmaster\@javaworld.com</A>.
</H2>
</BODY>
</HTML>
";
$nomatchesfound = "Content-TYPE: text/html
<HTML>
<HEAD><TITLE>SunWorld Online Search Response</TITLE></HEAD>
<BODY>
<P>
<H2>
Alas, no matches to your search string ($casestr):
<P>
<EM>$origsearchstring</EM>
<P>
were found.
<P>
Thank you for searching <EM>SunWorld Online</EM>. We appreciate your patronage.
Send any questions, comments, or suggestions to
<A HREF=\"/cgi-bin/jw-mailto.cgi?webmaster\@javaworld.com\">webmaster\@javaworld.com</A>.
</H2>
</BODY>
</HTML>
";
}
sub sendhome {
$date=`date`; chop($date);
open(MAIL,"|$mailprog $bcc") || return;
print MAIL "From: $bcc ($bccname)\n";
print MAIL "To: $bcc\n";
print MAIL "Subject: $bccsubject\n\n";
print MAIL "BEGIN RECORD $date\n";
print MAIL "CONTENT_LENGTH=$ENV{'CONTENT_LENGTH'}\n";
print MAIL "CONTENT_TYPE=$ENV{'CONTENT_TYPE'}\n";
print MAIL "DOCUMENT_ROOT=$ENV{'DOCUMENT_ROOT'}\n";
print MAIL "GATEWAY_INTERFACE=$ENV{'GATEWAY_INTERFACE'}\n";
print MAIL "HTTP_REFERER=$ENV{'HTTP_REFERER'}\n";
print MAIL "HTTP_USER_AGENT=$ENV{'HTTP_USER_AGENT'}\n";
print MAIL "QUERY_STRING=$ENV{'QUERY_STRING'}\n";
print MAIL "REMOTE_ADDR=$ENV{'REMOTE_ADDR'}\n";
print MAIL "REMOTE_HOST=$ENV{'REMOTE_HOST'}\n";
print MAIL "REQUEST_METHOD=$ENV{'REQUEST_METHOD'}\n";
print MAIL "SCRIPT_NAME=$ENV{'SCRIPT_NAME'}\n";
print MAIL "SERVER_NAME=$ENV{'SERVER_NAME'}\n";
print MAIL "SERVER_PORT=$ENV{'SERVER_PORT'}\n";
print MAIL "SERVER_PROTOCOL=$ENV{'SERVER_PROTOCOL'}\n";
print MAIL "SERVER_SOFTWARE=$ENV{'SERVER_SOFTWARE'}\n";
print MAIL "ARGV=@ARGV\n";
print MAIL "STDINDATA=$buffer\n";
print MAIL "END RECORD $date\n";
close(MAIL);
}